#Read in voter hstory data
vhist <- read_csv("data/2017_CO/VHist_2017/CO_2017_VHist_full.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## VOTER_ID = col_integer(),
## ELECTION_TYPE = col_character(),
## ELECTION_DATE = col_character(),
## ELECTION_DESCRIPTION = col_character(),
## VOTING_METHOD = col_character(),
## PARTY = col_character(),
## COUNTY_NAME = col_character()
## )
summary(as.factor(vhist$VOTING_METHOD))
## Absentee Carry Absentee Mail Early Voting
## 106196 13258152 1224179
## Early Voting - DRE In Person In Person - DRE
## 418364 187168 160042
## Mail Ballot Mail Ballot - DRE Polling Place
## 8397030 2840 8746351
## Vote Center Vote Center - DRE NA's
## 231354 181178 1269812
vhist_na <- vhist %>%
filter(is.na(VOTING_METHOD))
summary(as.factor(vhist_na$COUNTY_NAME))
## Adams Alamosa Archuleta Boulder Chaffee Conejos
## 11 1 2 6 5 2
## Delta Douglas Gilpin Gunnison Jackson Jefferson
## 1 2 104 79 915 1264424
## Kiowa Kit Carson La Plata Lake Las Animas Logan
## 382 4 93 1458 18 7
## Mineral Moffat Montezuma Morgan Otero Ouray
## 10 5 48 4 4 3
## Park Phillips Pitkin Rio Blanco Rio Grande Saguache
## 1 114 2 4 1 13
## San Miguel Summit Teller Yuma
## 2081 1 6 1
summary(as.factor(vhist$COUNTY_NAME))
## Adams Alamosa Arapahoe Archuleta Baca Bent
## 2053966 96137 3709094 86775 36137 31366
## Boulder Broomfield Chaffee Cheyenne Clear Creek Conejos
## 2689444 326236 133980 23503 82810 68632
## Costilla Crowley Custer Delta Denver Dolores
## 38063 27871 45622 229703 4287463 18342
## Douglas Eagle El Paso Elbert Fremont Garfield
## 1940459 248526 3567938 192444 291829 313742
## Gilpin Grand Gunnison Hinsdale Huerfano Jackson
## 40622 110281 99259 10511 57919 15082
## Jefferson Kiowa Kit Carson La Plata Lake Larimer
## 4558186 17634 51812 346228 43923 2287320
## Las Animas Lincoln Logan Mesa Mineral Moffat
## 106573 44428 142804 1142624 9986 102418
## Montezuma Montrose Morgan Otero Ouray Park
## 156604 288434 143654 114665 49152 122798
## Phillips Pitkin Prowers Pueblo Rio Blanco Rio Grande
## 44681 130499 81201 1038130 51655 75302
## Routt Saguache San Juan San Miguel Sedgwick Summit
## 168492 40928 7771 63924 27954 170536
## Teller Washington Weld Yuma
## 150301 39491 1411792 77010
summary(as.factor(year(mdy(vhist_na$ELECTION_DATE))))
## 1988 1990 1991 1992 1993 1994 1995 1996 1997 1998
## 78288 104370 12150 136783 53392 97355 52841 155341 65252 142150
## 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008
## 80369 211508 74738 446 118 1267 328 2147 938 31
summary(as.factor(vhist_na$ELECTION_TYPE))
## Coordinated General Primary Recall Special
## 327941 691002 236446 2272 12151
There appear to be several different counties here that have NA’s in the Voting Method column. Most NAs are in Jefferson. Admittedly, that is the most populous of counties, but it is still about 1 in 40 voters! At first glance, the number of NAs rises exponentially with the totals of voters per county.
Every single NA is concentrated before 2013, when it could be reasonaby supposed that at least local elections were conducted through VBM. The vast, VAST majority are before 2002.
The distribution of NAs across election types seems fairly consistent with the total amount of votes cast in each contest type. This is reasonable, since it suggests that counting method is independent of election type.
#Moving on with code as if problems were not there
#Sorting out voting methods
vhist <- vhist %>%
select(3:6, 8)
vhist$VOTING_METHOD[vhist$VOTING_METHOD == "Early Voting - DRE"] <- "Early Voting"
vhist$VOTING_METHOD[vhist$VOTING_METHOD == "In Person - DRE"] <- "In Person"
vhist$VOTING_METHOD[vhist$VOTING_METHOD == "Mail Ballot - DRE"] <- "Mail Ballot"
vhist$ELECTION_DATE <- mdy(vhist$ELECTION_DATE)
#More needed, don't know what ones to cut, will do the following##
#Create turnouts file with raw vote counts
turnouts <- vhist %>%
mutate(count = 1) %>%
group_by(COUNTY_NAME, ELECTION_DESCRIPTION, ELECTION_TYPE, ELECTION_DATE, VOTING_METHOD) %>%
summarize(total_votes = sum(count)) %>%
spread(key = VOTING_METHOD, value = total_votes) %>%
filter(year(ELECTION_DATE) >= 1992)
vrf <- read_csv("data/2017_CO/VRF_2017/CO_2017_VRF_full.csv",
col_types = cols_only(VOTER_ID = col_guess(), COUNTY = col_guess(), REGISTRATION_DATE = col_guess(), EFFECTIVE_DATE = col_guess(), VOTER_STATUS = col_guess()))
vrf$REGISTRATION_DATE <- mdy(vrf$REGISTRATION_DATE)
vrf$EFFECTIVE_DATE <- mdy(vrf$EFFECTIVE_DATE)
vrf <- vrf %>%
filter(year(REGISTRATION_DATE) > 1910 && year(EFFECTIVE_DATE) > 1910) %>%
filter(VOTER_STATUS == "Active")
col_county_registrants <- vrf %>%
mutate(count = 1, YEAR_REGISTERED = year(REGISTRATION_DATE)) %>%
group_by(COUNTY, YEAR_REGISTERED) %>%
summarize(REGISTERED_IN_YEAR = sum(count))
col_county_registrants <- col_county_registrants %>%
group_by(COUNTY) %>%
mutate(REGISTRANTS = cumsum(REGISTERED_IN_YEAR)) %>%
filter(YEAR_REGISTERED >= 1992)
#Graphwith test data
years <- data.frame(as.integer(seq(1990, 2017, 1)), runif(28, 1, 30))
names(years) <- c("year", "dat")
ggplot(years, aes(x = year, y = dat)) +
geom_vline(xintercept = 1992, col = "red") +
geom_vline(xintercept = 2008, col = "red") +
geom_vline(xintercept = 2013, col = "red") +
geom_smooth() +
geom_point()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
col <- map_data("county") %>%
filter(region == 'colorado')
county_registrants <- vrf %>%
mutate(count = 1) %>%
group_by(COUNTY) %>%
summarise(TOTAL_REGISTERED = sum(count))
county_registrants$COUNTY <- tolower(county_registrants$COUNTY) # matching string
col_reg <- merge(col, county_registrants, by.x = "subregion", by.y = "COUNTY")
col_reg$pop_cat <- cut(col_reg$TOTAL_REGISTERED, breaks = c(seq(0, 500000, by = 100000)), labels=1:5)
p <- col_reg %>%
group_by(group) %>%
plot_ly(x = ~long, y = ~lat, color = ~pop_cat, colors = c('pink', 'dark red'),
text = ~subregion, hoverinfo = 'text') %>%
add_polygons(line = list(width = 0.4)) %>%
add_polygons(
fillcolor = 'transparent',
line = list(color = 'black', width = 0.5),
showlegend = FALSE, hoverinfo = 'none'
) %>%
layout(
title = "Colorado Total Registrants by County",
titlefont = list(size = 10),
xaxis = list(title = "", showgrid = FALSE,
zeroline = FALSE, showticklabels = FALSE),
yaxis = list(title = "", showgrid = FALSE,
zeroline = FALSE, showticklabels = FALSE)
)
p